title: “TissueSpecific” output: html_document —
To get an idea of the presence of tissue specifics, we can look at the prior weight on the tissue specific configuraitons.
colnames(lfsr)=colnames(b.stat);
thresh=0.05
dist=as.matrix(lfsr)<=thresh
ones=which(rowSums(dist)==1)
thresh=0.05
barplot(apply(lfsr[which(rowSums(lfsr<=thresh)==1),],2,function(x){sum(x<=thresh)}),las=2,cex.names=0.5,main=paste0("Number of eQTL with LFSR<",0.05," in Single Tissue"),ylim=c(0,420))
zthresh=1
barplot(apply(maxb[which(rowSums(maxb>zthresh)==1),],2,function(x){sum(x>zthresh)}),las=2,cex.names=0.5,main=paste0("Number of eQTL with B>",zthresh,"in Single Tissue"))
pis=readRDS("../Data/pisAug13withED.rds")$pihat
pi.mat=matrix(pis,ncol=54,byrow=T)
num.mat=matrix(seq(1:length(covmat)),ncol=54,byrow=TRUE)
#num.mat=matrix(seq(1:1188),ncol=54,byrow=TRUE)
omegas=round(sapply(num.mat[,1],function(x){covmat[[x]][1]}),3)
pis=readRDS("../Data/pisAug13withED.rds")$pihat
singleton.weights=pis[sapply(covmat,function(x){sum(diag(x)!=0)==1})]
sum(colSums(pi.mat[,10:53]))
## [1] 0.005241691
We might say that only 0.0052417 percent of genes demonstrate tissue specificity.
And how about the ‘completely consistent’?
#singleton.weights=pis[sapply(covmat,function(x){sum(diag(x)!=0)==1})]
colSums(pi.mat)[54]
## [1] 0.02572196
Let’s understand qualitatively how matrix ash affects tissue specific effects:
First, we plot the ‘singletons’ of the ‘noisy zstats’
library(qtlcharts)
id=colnames(maxb)
iplotCurves(as.matrix(maxb)[ones,],1:44,chartOpts=list(xlab="Tissue Indices", ylab="B_mle",pointcolor=c("slateblue")))
## Set screen size to height=700 x width=1000
#barplot(as.matrix(maxb)[ones,],las=2,colnames=colnames(maxb))
#barplot(as.matrix(posterior.means)[ones,],las=2,names=colnames(maxb))
Now, we plot the matrix-ash shrunken zstats. Look how clean!
iplotCurves(as.matrix(posterior.means[ones,]),1:44,chartOpts=list(xlab="Tissue Indices", ylab="E(B|Data)",pointcolor=c("slateblue")))
Pull out tissue-specifics; examine examples.
Here’s an example:
j=which(rownames(b.stat)=="ENSG00000244171.3_3_142895174_T_G_b37")
barfunc(j)
Let’s examine ENSG00000244171.3_3_142895174_T_G_b37 do discover his function:
pre-B-cell leukemia homeobox 2 pseudogene 1 [Source:HGNC Symbol;Acc:HGNC:8635]
We can find all the ones who have high loading on the ‘tissue specific’ configs:
pw.singles=post.weights[,sapply(covmat,function(x){sum(diag(x)!=0)==1})]
singleton.rank=rowSums(pw.singles)
j=which.max(singleton.rank)
barfunc(j)
This particular gene is Desmoplakin, known to be involved in dilated cardiomyopathy.
pw.all=post.weights[,sapply(covmat,function(x){sum(diag(x)/max(diag(x)))/sum(x[1,2]!=0)==44})]
cons.rank=rowSums(pw.all[,6:22])
j=which.max(cons.rank)
barfunc(j)
Some tissue specific examples:
library("qtlcharts")
plot_ts("Testis",lfsr, b.stat,0.05)
plot_ts("Testis",lfsr, posterior.means,0.05)
plot_ts("Whole_Blood",lfsr, b.stat,0.05)
plot_ts("Whole_Blood",lfsr, posterior.means,0.05)
plot_ts("Muscle_Skeletal",lfsr,b.stat,0.05)
plot_ts("Liver",lfsr, b.stat,0.05)
plot_ts("Liver",lfsr, posterior.means,0.05)
plot_ts("Lung",lfsr, b.stat,0.05)
plot_ts("Lung",lfsr, posterior.means,0.05)
plot_ts("Lung",lfsr, b.stat,0.05)
plot_ts("Lung",lfsr, posterior.means,0.05)
plot_ts("Muscle_Skeletal" ,lfsr, b.stat,0.05)
plot_ts("Muscle_Skeletal" ,lfsr, posterior.means,0.05)
clean_names=sapply(rownames(maxb),function(x){ strsplit(x, "[.]")[[1]][1]})
liver=which(clean_names=="ENSG00000166923")
barfunc(genename = liver)
msk=which(clean_names=="ENSG00000092054")
barfunc(genename = msk)
lung=which(clean_names=="ENSG00000151468")
barfunc(genename = lung)
testes=which(rownames(maxb)=="ENSG00000131848.5_19_56741808_G_A_b37")
##potassium channel subfamily M regulatory beta subunit 4
barfunc(genename = testes)
msk=(which(rownames(maxb)=="ENSG00000103316.6_16_21277438_C_T_b37"))
barfunc(genename = msk)
##crystallin
ms3=which(clean_names=="ENSG00000152601")
barfunc(genename = ms3)
##ENSG00000152601 muscleblind-like (Drosophila)
ms4=which(clean_names=="ENSG00000176658")
barfunc(genename = ms4)
##ENSG00000152601 muscleblind-like (Drosophila)
thyroid=(which(rownames(maxb)=="ENSG00000075275.12_22_46859003_G_C_b37"))
##cadherin, EGF LAG seven-pass G-type receptor 1
barfunc(genename = thyroid)
adrenal=(which(rownames(maxb)=="ENSG00000135643.4_12_70965479_A_G_b37"))
barfunc(genename = adrenal)
#potassium channel subfamily M regulatory beta subunit 4
wholebloodone=(which(rownames(maxb)=="ENSG00000156110.9_10_75928933_T_C_b37"))
barfunc(genename = wholebloodone)
#Adenosine kinase (HGNC Symbol)
#Entrez gene summary
#This gene an enzyme which catalyzes the transfer of the gamma-phosphate from ATP to adenosine, thereby serving as a regulator of concentrations of both extracellular adenosine and intracellular adenine nucleotides. Adenosine has widespread effects on the cardiovascular, nervous, respiratory, and immune systems and inhibitors of the enzyme could play an important pharmacological role in increasing intravascular adenosine concentrations and acting as anti-inflammatory agents. Multiple transcript variants encoding different isoforms have been found for this gene. [provided by RefSeq, Jan 2011]"
wholebloodtwo=(which(rownames(maxb)=="ENSG00000078114.14_10_21501314_T_C_b37"))
barfunc(genename = wholebloodtwo)
#Experiment name Transcription profiling of human blood taken from individuals fed an antioxidant-rich diet or a diet containing kiwi-fruit to evaluate the effect on gene expression with emphasis on stress and repair related processes [E-MEXP-2030]
# Source ArrayExpress
# Experiment description Plant-based diets rich in fruit and vegetables can prevent development of several chronic age-related diseases such as cancer and cardiovascular diseases. The mechanisms behind this protective effect is not elucidated. We have studied whether a specially designed antioxidant-rich food diet and a kiwi-fruit intervention can affect whole genome expression in human blood cells with emphasis on stress and repair related process.
# Chips
# Chip ID: OAS12-1 - Annotation by Bgee curators: Anatomical structure ID: EV:0100047 - Developmental stage ID: HsapDO:0000148
# Probeset ID: 203961_at - Mapped to gene: ENSG00000078114 - Detection flag: absent - Quality: high quality
wholebloodthree=(which(rownames(maxb)=="ENSG00000181722.11_3_114267837_A_G_b37"))
lfsr.ash=read.table("../Comparison with Univariate Ash//univariate.ash.lfsr.txt")
lfsr.mash=read.table("../Data/Aug13withEDlfsr.txt")[,-1]
t=wholebloodthree
par(mfrow=c(1,3))
b=barplot(as.numeric(maxb[t,]),main=paste0("B.MLE,ENSG00000181722"),cex.names=0.5,las=2,ylab="B.mle",names=colnames(maxb),ylim=c(-0.2,.2))
b=barplot(as.numeric(pm.mash.beta[t,]),main=paste0("E(B|EB.mash),ENSG00000181722"),col=col.func(t,lfsr=lfsr.mash,posterior.means=posterior.means),cex.names=0.5,las=2,ylab="PosteriorMean",names=colnames(posterior.means),ylim=c(-0.2,0.2))
legend("top",c("lfsr>0.5","0.5>lfsr>0.05","lfsr<0.05"),col=c("red","orange","green"),pch=20)
b=barplot(as.numeric(pm.ash.beta[t,]),main=paste0("E(B|EB_ash),ENSG00000181722"),col=col.func(t,lfsr=lfsr.ash,posterior.means=posterior.means),cex.names=0.5,las=2,ylab="PosteriorMean",names=colnames(posterior.means),ylim=c(-0.2,0.2));
legend("top",c("lfsr>0.5","0.5>lfsr>0.05","lfsr<0.05"),col=c("red","orange","green"),pch=20)
barfunc(genename = wholebloodthree)
We also denoise and shrink the consistent effects away from 0.
plot_tc(lfsr,curvedata = maxb,thresh = 0.00005)
plot_tc(lfsr,curvedata = posterior.means,thresh = 0.00005)